
*************************************************************************************************
/*																								
    Purpose: 																
    -Clean nls older men 1966 												
    -Attach 1950 ANES occ codes to nls occupations   						
																								
    Creates: nls_older_cleaned.dta											
    Note: Sample is men aged 45-50.											
*/																								
*************************************************************************************************

set more off 
clear 

cd "$Mydirectory1/1_DataSources/NLS_older_men/"

*---------------------------------------------------------------------------------------------* 
*---------------------------------------------------------------------------------------------* 

****************************************
* ADD R'S NATIONALITY TO MAIN DATASET *
****************************************
//prepare nationality variables to be added to main dataset later

insheet using "./rawdata/Rnat_classwrker_formobility.csv", name 
rename r* R* /*necessary preliminary step*/ 

do "./rawdata/Rnat_classwrker_formobility-value-labels.do"  //NLS-generated clean-up file 

keep R0000100 R0053000 
rename R0053000 class_wkr 
ren R0000100 id_nlsom 

replace class_wkr=. if class_wkr==-4 

sort id_nlsom 
tempfile tempie 
save `tempie', replace 
clear 

*---------------------------------------------------------------------------------------------* 
*---------------------------------------------------------------------------------------------* 

***********************************************
* ADD # OF KIDS LIVING WITH R TO MAIN DATASET *
***********************************************
//prepare hh size variables to be added to main dataset later

insheet using "./rawdata/numkids_inhh.csv", name 
rename r* R* /*necessary preliminary step*/ 

do "./rawdata/numkids_inhh-value-labels.do" //NLS-generated clean-up file 

ren R0050000 R_numkids_living
ren R0050100 R_totnumkids_livinginhh
ren R0050200 R_numkids_lessthan14_livinginhh
ren R0050500 R_totnumkids_0to17_livinginhh
ren R0050300 R_numkids_14to17_livinginhh
ren R0050400 R_numkids_18to22_livinginhh
ren R0050600 R_numkids_23plus_livinginhh
ren R0050700 R_numkids_ageNA_livinginhh
ren R0000100 id_nlsom 

global list "R_numkids_living R_totnumkids_livinginhh R_numkids_lessthan14_livinginhh R_totnumkids_0to17_livinginhh R_numkids_14to17_livinginhh R_numkids_18to22_livinginhh R_numkids_23plus_livinginhh R_numkids_ageNA_livinginhh"

	foreach l of global list {
		tab `l', m
		replace `l' =. if `l'<0 //<0 is refusal,DK,valid skip, or non-interview.
		tab `l', m	
}  

/*Note: Slight discrepancy with R_numkids_living--
        it's supposed to be topcoded at 15, but there
        are a couple "16" and "17" values. Will replace 
        these values as "15".*/
	tab R_numkids_living, m
	replace R_numkids_living =15 if (R_numkids_living==16 | R_numkids_living==17)
	tab R_numkids_living, m

*Flag topcoded values of R_numkids_living
	gen flag_15plus_kidsliving = (R_numkids_living==15) if R_numkids_living<.
	tab flag_15plus_kidsliving, m
	label var flag_15plus_kidsliving "Dummy =1 if R has an indeterminate (15+) # of living kids"

sort id_nlsom 
tempfile tempie2 
save `tempie2', replace 
clear 

*---------------------------------------------------------------------------------------------* 
*---------------------------------------------------------------------------------------------* 


************************************* 
*    BRING IN MAIN DATASET          * 
************************************* 

insheet using "./rawdata/nls66_for_mobility.csv", name 

***Fix variable naming issue (allows NLS clean-up file to be run) 
foreach var of varlist r* { 
di "`var'" 
local temp=substr("`var'", 2, 7) 
di "`temp'" 
local temp2="R`temp'" 
di "`temp2'" 
rename `var' `temp2' 
} 

do "./rawdata/nls66_for_mobility-value-labels.do" //NLS-provided clean up for main dataset

*** Weight info 
    *R0000200 is the appropriate cross-sectional sampling weight. 

/*------------------------------------------------------------------------------------------
	CLEANING
------------------------------------------------------------------------------------------*/

****Rename variables 
ren R0000100 id_nlsom 
ren R0000200 sampling_wght_nls 
ren R0002200 age 
ren R0002201 month_birth 
ren R0002203 year_birth 
ren R0002300 race 
ren R0002400 marital_status 
ren R0002451 region_residence 
ren R0006400 R_occupation 
ren R0019300 higrade_attended 
ren R0019400 Rcomplete_higrade_attended 
ren R0024200 r_income 
ren R0024800 employment_status1965 
ren R0025800 inc1965_veteranscomp 
ren R0028400 Rmove_sincebirth 
ren R0029000 Rfather_birthplace 
ren R0029700 person_Rlivedwithage15 
ren R0029800 headofhh_occ 
ren R0029900 headofhh_higrade 
ren R0032900 relationshiptoR_fam1 
ren R0033200 higrade_fam1 
ren R0033300 complete_higrade_fam1 
ren R0033700 relationshiptoR_fam2 
ren R0034000 higrade_fam2 
ren R0034100 complete_higrade_fam2 
ren R0034500 relationshiptoR_fam3 
ren R0034800 higrade_fam3 
ren R0034900 complete_higrade_fam3 
ren R0035300 relationshiptoR_fam4 
ren R0035600 higrade_fam4 
ren R0035700 complete_higrade_fam4 
ren R0036100 relationshiptoR_fam5 
ren R0036400 higrade_fam5 
ren R0036500 complete_higrade_fam5 
ren R0036900 relationshiptoR_fam6 
ren R0037200 higrade_fam6 
ren R0037300 complete_higrade_fam6 
ren R0037700 relationshiptoR_fam7 
ren R0038000 higrade_fam7 
ren R0038100 complete_higrade_fam7 
ren R0038500 relationshiptoR_fam8 
ren R0038800 higrade_fam8 
ren R0038900 complete_higrade_fam8 
ren R0039300 relationshiptoR_fam9 
ren R0039600 higrade_fam9 
ren R0039700 complete_higrade_fam9 
ren R0040100 relationshiptoR_fam10 
ren R0040400 higrade_fam10 
ren R0040500 complete_higrade_fam10 
ren R0040900 relationshiptoR_fam11 
ren R0041200 higrade_fam11 
ren R0041300 complete_higrade_fam11 
ren R0041700 relationshiptoR_fam12 
ren R0042000 higrade_fam12 
ren R0042100 complete_higrade_fam12 
ren R0042500 relationshiptoR_fam13 
ren R0042800 higrade_fam13 
ren R0042900 complete_higrade_fam13 
ren R0043300 relationshiptoR_fam14 
ren R0043600 higrade_fam14 
ren R0043700 complete_higrade_fam14 
ren R0044100 relationshiptoR_fam15 
ren R0044400 higrade_fam15 
ren R0044500 complete_higrade_fam15 
ren R0044900 relationshiptoR_fam16 
ren R0045200 higrade_fam16 
ren R0045300 complete_higrade_fam16 
ren R0045700 relationshiptoR_fam17 
ren R0046000 higrade_fam17 
ren R0046100 complete_higrade_fam17 
ren R0046500 relationshiptoR_fam18 
ren R0046800 higrade_fam18 
ren R0046900 complete_higrade_fam18 
ren R0047300 relationshiptoR_fam19 
ren R0047600 higrade_fam19 
ren R0047700 complete_higrade_fam19 
ren R0048100 relationshiptoR_fam20 
ren R0048400 higrade_fam20 
ren R0048500 complete_higrade_fam20 
ren R0052500 employment_status1966 
ren R0056400 higrade_completed 
ren R0057500 net_fam_inc 
ren R0057520 totfam_inc 

*Merge in nationality variables 
sort id_nlsom 
merge 1:1 id_nlsom using `tempie' 
drop _merge 

*Merge in hh size variables
sort id_nlsom 
merge 1:1 id_nlsom using `tempie2' 
drop _merge


***************************************************************************** 
* Obtain father occupation and mother occupation from headofhh_occ variable * 
*****************************************************************************
clonevar headofhh_occ2 = headofhh_occ
replace person_Rlivedwithage15=. if person_Rlivedwithage15==-4 
replace headofhh_occ=. if (headofhh_occ==-4 | headofhh_occ==995) /*occ not reported or skipped */ 

gen dad_occ =. 
replace dad_occ = headofhh_occ 
replace dad_occ=. if inlist(person_Rlivedwithage15,5,7,8,.) /*R only lived with his mom, had some other arrangement (NOT living with a father or step-father), or lived on his own.*/
tab dad_occ, m 

gen mom_occ =.
replace mom_occ = headofhh_occ 
replace mom_occ =. if person_Rlivedwithage15!=5
tab mom_occ,m 

*********************************************** 
* Demographics * 								
*********************************************** 

***************** 
* Gender *		  
***************** 

gen sex=1 
label define sex_l 1 "Male" 
label values sex sex_l 
tab sex, missing 

***************** 
* Age *           
***************** 

drop if age>50 /*note: ages appear to be 45 and up */ 
gen agesq=age*age 

****************** 
* Marital Status * 
****************** 

replace marital_status =. if marital_status ==-4 

gen married = (marital_status==1 | marital_status==2) if marital_status<. 
tab married, m 

label var married "Respondent married" 

gen never_married = (marital_status==6) if marital_status<. 
tab never_married, m 

gen widowed = (marital_status==3) if marital_status<. 
tab widowed, m 

gen divorced = (marital_status==4) if marital_status<. 
tab divorced, m 

gen separated = (marital_status==5) if marital_status<. 
tab separated, m 

****************** 
* Race *		   
****************** 

label var race "Respondent race" 
replace race =. if race==3 
tab race, m /*1=white, 2=Black */ 
/*Note: Unable to tell if Hispanic/Latinx individuals 
        were coded as "white" or "other". */

gen black = race==2 if race<. 
tab black, m 
label var black "Black" 
	
*Note: Father race is not available.

****************** 
* Foreign Born *   
****************** 
replace Rmove_sincebirth=. if Rmove_sincebirth==-4 

*Respondent 
gen foreignborn = (Rmove_sincebirth==7) if Rmove_sincebirth<. 
tab foreignborn, m 

keep if (foreignborn==0 | foreignborn==.) 

*R's Father
replace Rfather_birthplace =. if Rfather_birthplace==-4 

gen fatherforeign = (Rfather_birthplace~=1) if Rfather_birthplace<. 
tab fatherforeign, m 

*-------------------------------------------------------------------------------------------------* 
*-------------------------------------------------------------------------------------------------* 

************************************************************************************* 
* State/Region Where Respondent Grew Up or Was Born * 								  
************************************************************************************* 

*********************** 
* State R was born in * 
*********************** 

*not available 

************************ 
* Region R was born in * 
************************ 

*not available 

****************************************** 
* Whether R was born in the south *        
****************************************** 

gen bornsouth =.
replace bornsouth = (Rmove_sincebirth==5 | (inrange(Rmove_sincebirth,1,2) & region_residence==1)) if Rmove_sincebirth<. 
tab bornsouth, m 

****************************************** 
* Whether R has moved region since birth * 
****************************************** 

gen moved_region = (Rmove_sincebirth>2) if Rmove_sincebirth<. 
tab moved_region, m 
label variable moved_region "R has moved region since birth" 

***************************************** 
* State/Region Where Respondent Grew Up * 
***************************************** 

*not available 

****************************************** 
* Region R currently resides in * 		   
****************************************** 

/*Note: Current region of residence is not available.
	    Can only tell whether someone currently resides 
	    in the South.
*/
ren region_residence south_residence 
tab south_residence, m 
label var south_residence "R currently resides in the South" 

*-------------------------------------------------------------------------------------------------* 
*-------------------------------------------------------------------------------------------------* 

***************** 
* Employment *   
***************** 

gen employed = (employment_status1966==1 | employment_status1966==2) 
tab employed, m 

gen laborforce = inrange(employment_status1966,1,3) 
tab laborforce, m 

gen selfemployed = (class_wkr==4) if class_wkr<. 
tab selfemployed, m 

************************* 
* Education--Respondent * 
************************* 

replace higrade_attended=. if higrade_attended==-4 
replace Rcomplete_higrade_attended=. if Rcomplete_higrade_attended==-4 

gen higrade = (higrade_attended) if higrade_attended<. 
replace higrade = higrade-1 if Rcomplete_higrade_attended==0  /*Correct higrade if respondent attended a certain grade level but didn't complete it. */ 

rename higrade yrsschool 
label var yrsschool "Years of school (highest grade completed, R)" 
tab yrsschool, m nol 

gen yrsschool_bin=. 
replace yrsschool_bin = 0 if yrsschool==0 
replace yrsschool_bin = 6 if yrsschool>0 & yrsschool<8 
replace yrsschool_bin = 8 if yrsschool==8 
replace yrsschool_bin = 10 if yrsschool>8 & yrsschool<12 
replace yrsschool_bin = 12 if yrsschool==12 
replace yrsschool_bin = 14 if yrsschool>12 & yrsschool<16 
replace yrsschool_bin = 16 if yrsschool>=16 & yrsschool<20 
label var yrsschool_bin "Years of school, binned" 

gen eduR= 0 if yrsschool==0 
replace eduR=1 if yrsschool>=1 & yrsschool<8  /* some grade school */ 
replace eduR=2 if yrsschool==8  /* completed 8th grade */ 
replace eduR=3 if yrsschool>8 & yrsschool<12  /* some HS */ 
replace eduR=4 if yrsschool==12 /* 4 years of HS */ 
replace eduR=5 if yrsschool>12 & yrsschool<16  /* 1-3 years of college */ 
replace eduR=6 if yrsschool>15 & yrsschool<.  /* 4 or more years of college (like BA) */ 
tab eduR, m 

gen hs_ed = eduR>=4 if eduR<. 
tab hs_ed, m 

gen coll_ed = eduR>=6 if eduR<. 
tab coll_ed, m 

label var hs_ed "HS educated" 
label var coll_ed "Coll educated" 

********************** 
* Education--Parents * 
********************** 

*Mother education--not available
/*Note: The only way to observe the education of R's father 
        is a variable that asks the highest grade completed 
        by head of household when R was 15. In order to ensure
        that only Rs with a father (or stepfather) as their head of hh receive
        non-missing values for yrsschool_dad, the variable 
        person_Rlivedwithage15 will be used in combination with 
        headofhh_higrade. */

replace headofhh_higrade=. if headofhh_higrade==-4 

gen yrsschool_dad =. 
replace yrsschool_dad = headofhh_higrade 
replace yrsschool_dad =. if person_Rlivedwithage15>=5 & person_Rlivedwithage15<. /*In case head of hh was not R's father or stepfather, replace yrsschool_dad as missing. */ 
/* Note: When no info is reported about who R lived with at age 15, 
         it is assumed that R lived with a father or step-father. 
         This assumption is only applied to 28 observations.
*/
tab yrsschool_dad, m 

*categorical 
gen edu_dad=. 
replace edu_dad=0 if yrsschool_dad==0 /*none*/ 
replace edu_dad=1 if yrsschool_dad>=1 & yrsschool_dad<=7 /*some grade school*/ 
replace edu_dad=2 if yrsschool_dad==8 /*completed 8th grade*/ 
replace edu_dad=3 if yrsschool_dad>=9 & yrsschool_dad<=11 /*some HS*/ 
replace edu_dad=4 if yrsschool_dad==12 /*4 years of HS*/ 
replace edu_dad=5 if yrsschool_dad>=13 & yrsschool_dad<=15 /*some college*/ 
replace edu_dad=6 if yrsschool_dad>=16 & yrsschool_dad~=. /*college*/ 
label var edu_dad "Educational categories for dad" 
tab edu_dad, m 
	
* binned 
gen edu_dad_bin=0 if edu_dad==0 
replace edu_dad_bin=6 if edu_dad==1 
replace edu_dad_bin=8 if edu_dad==2 
replace edu_dad_bin=10 if edu_dad==3 
replace edu_dad_bin=12 if edu_dad==4 
replace edu_dad_bin=14 if edu_dad==5 
replace edu_dad_bin=16 if edu_dad==6 
tab edu_dad_bin, m 
label var edu_dad_bin "Years of school from bins" 
	
gen dad_hs_ed = edu_dad>=4 if edu_dad<. 
tab dad_hs_ed, m 

gen dad_coll_ed = edu_dad>=6 if edu_dad<. 
tab dad_coll_ed, m 

label var dad_hs_ed "Dad HS educated" 
label var dad_coll_ed "Dad College educated" 

*************
* Siblings *
*************

*not available

*****************
* Own Fertility *     
*****************

* # (living) boys--not available 
* Flag indeterminate (high) # of boys--not available
* # (living) girls--not available 
* Flag indeterminate (high) # of girls--not available

* # of living kids--already coded up.
	label var R_numkids_living "# of R's kids who are living (R0050000)"

* # of kids ever--not available. 
* # of deceased kids--not available
* Dummy: has R ever had kids--not available 

* Dummy: does R have kids right now 
	gen R_kids_now = R_numkids_living!=0 if R_numkids_living<. 
	tab R_kids_now,m 

*********************************
* # of persons in R's household *
*********************************

* # of kids (of any age) living in R's hh--already coded up
	label var R_totnumkids_livinginhh "Total # of kids (of any age) living in R's hh (R0050100)"

* # of children 0-17 living in R's hh--already coded up
	label var R_totnumkids_0to17_livinginhh "Total # of kids (0-17) living in R's hh"

*total # of people living in R's household (note: spouse is included in relationshiptoR_fam variables)

	foreach num of numlist 1/20 {
		tab relationshiptoR_fam`num', m
		replace relationshiptoR_fam`num' =. if relationshiptoR_fam`num'==-4 //valid skip
		tab relationshiptoR_fam`num', m
		
		gen Fam`num'_liveswithR = (relationshiptoR_fam`num'!=.)
		tab Fam`num'_liveswithR , m
	}
	
	egen R_hhsize_minusR = rowtotal(*_liveswithR), missing
	tab R_hhsize_minusR, m

	gen R_hhsize_plusR = R_hhsize_minusR +1 //Note: Adding 1 because  "self" is never an option in the relationship to respondent variables that are used to construct hh size.
	tab R_hhsize_plusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"	
	

****************** 
* Unions *         
****************** 

*not available in 1966 

******************* 
* Veterans *        
******************* 

*Unavailable

/*------------------------------------------------------------------------------------------

Crosswalking 1966 NLS occupations to 1950 ANES occupations for fathers							

------------------------------------------------------------------------------------------*/

*********************** 
*		      * 
*    Crosswalking     * 
* 		      * 
*********************** 

foreach name in dad mom {
	gen census1960=`name'_occ 

	label var census1960 "==`name'_occ (renamed to facilitate a merge)" 

	preserve
	use "../Crosswalks/Crosswalk_1960Census_toANES.dta" , clear
	if "`name'"=="mom" ren fatheroccej motheroccej
	
	tempfile crossw
	save `crossw'
	restore
	
	sort census1960 
	merge m:1 census1960 using `crossw' 
	assert census1960==. if _merge==1
	drop if _merge==2
	drop _merge

	drop census1960 

	}
	
/*Following the coding in other surveys: 
--headofhh_father =1 if R lived with both parents when growing up or if R reports having lived with only a father. 
--headofhh_mother =1 if R lived with a mother but not a father. 
--headofhh_othermale =1 if R lived with a male relative and not with R's parents.
--headofhh_otherfemale =1 if R lived with a female relative and not with R's parents or a male relative. 
*/

/*Note: In keeping with other surveys (e.g., NLSMW), respondents who answered 
        "on their own" or "other arrangement" will be assigned to "0" in the 
        dummies. */		

    gen abr_person_Rlivedwithage15 = person_Rlivedwithage15
	replace abr_person_Rlivedwithage15 =0 if inlist(abr_person_Rlivedwithage,7,8) 
	replace abr_person_Rlivedwithage15 = 1 if inlist(abr_person_Rlivedwithage15,2,3,4)
	label define hhhead_Rage15 0 "OTHER ARRANGEMENT; ON HIS OWN" 1 "FATHER (STEP, BIO, SINGLE)" 5 "MOTHER" 6 "MALE RELATIVE"
	label values abr_person_Rlivedwithage15 hhhead_Rage15
	
	tab abr_person_Rlivedwithage15, gen(headofhh_)
	
	//Suffix of headofhh variables is off by 1
		foreach num of numlist 1/4 {

		local n = `num'-1
		di "`n'"
		ren headofhh_`num' headofhh_`n'
	
	}

	ren headofhh_1 headofhh_father
	ren headofhh_2 headofhh_mother 
	ren headofhh_3 headofhh_othermale
		
	drop headofhh_0
	
	gen headofhh_otherfemale =0
	replace headofhh_otherfemale =. if abr_person_Rlivedwithage15==.
	
	foreach name in father othermale otherfemale mother {
		assert headofhh_`name' ==. if abr_person_Rlivedwithage15==. 
	}
	
	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"

	//Alternate dummy for father as head of hh during R's childhood.  
	/*Note: When R reports occupation of a parent but does not report 
			who R lived with when growing up, will assume that R lived 
			with father.*/
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if fatheroccej!=. & abr_person_Rlivedwithage15==. 
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	
*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************

tab headofhh_occ2 if fatheroccej==. & inlist(person_Rlivedwithage15,1,2,3,4,6,.), m 
	gen father_notworking =.
	replace father_notworking =0 if fatheroccej!=. //Note: All missing values for father occupation are because occupation is not reported. Unable to tell if R's father was working or not (e.g., retired, renter). Will code missing father occupations as 0.
	tab father_notworking,m 

tab headofhh_occ2 if motheroccej==. & person_Rlivedwithage15==5, m 
	gen mother_notworking =.
	replace mother_notworking =0 if motheroccej!=. //Note: All missing values for mother occupation are because occupation is not reported. Unable to tell if R's mother was working or not (e.g., retired, housewife). Will code missing mother occupations as 0.
	tab mother_notworking, m 
	
*************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials *
*************************************************************************
/* Note: There is no variable to see what "class" of work a father did 
         (i.e. gov't, private, self-employed, etc). Unable to adjust 
         father occupation for self-employment.*/

* Variable for father being either farm laborer or operator 
gen fatherfarm=0 
replace fatherfarm=. if fatheroccej==. 
replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81 
tab fatherfarm, m 

//Rename to allow crosswalk to be merged again
rename fatheroccej father_occ1950ej 

gen census1960= R_occupation /*now = adult R occ */ 
label var census1960 "==R_occupation (renamed to facilitate a merge)" 

sort census1960 
tempfile temps 
save `temps', replace 

/*------------------------------------------------------------------------------------------
Crosswalking 1966 NLS occupations to 1950 ANES occupations for sons							
------------------------------------------------------------------------------------------*/ 

*********************** 
*		      * 
*    Crosswalking     * 
* 		      * 
*********************** 

replace census1960=. if census1960<0 | census1960==995

merge m:1 census1960 using "../Crosswalks/Crosswalk_1960Census_toANES.dta"
assert census1960==. if _merge==1
drop if _merge==2
drop _merge

**************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
**************************************************************************
replace fatheroccej=21 if fatheroccej==28 & selfemployed==1 

********************************** 
*				 * 
*    Attaching inc estimates     * 
* 				 * 
********************************** 

*Rename to match the variable names in other data files 
rename fatheroccej occRej 
rename father_occ1950ej fatheroccej 

/*------------------------------------------------------------------------------------------
	Family Income							
------------------------------------------------------------------------------------------*/ 

gen lnfaminc_nobin = ln(totfam_inc) 

/*
The midpoint of each bin is assigned, with the exception of:
 (1) the last bin, whose bottom value is multiplied by 1.25 (as last bin is always "open-ended"--i.e. "25,000 or more")
 (2) the bottom bin, whose top value is multiplied by 0.75
*/ 
gen fam_inc=.  
replace fam_inc = 750 if totfam_inc<1000 
replace fam_inc = 1500 if totfam_inc>=1000 & totfam_inc<2000 
replace fam_inc = 2500 if totfam_inc>=2000 & totfam_inc<3000 
replace fam_inc = 3500 if totfam_inc>=3000 & totfam_inc<4000 
replace fam_inc = 4500 if totfam_inc>=4000 & totfam_inc<5000 
replace fam_inc = 5500 if totfam_inc>=5000 & totfam_inc<6000 
replace fam_inc = 7000 if totfam_inc>=6000 & totfam_inc<8000 
replace fam_inc = 9000 if totfam_inc>=8000 & totfam_inc<10000 
replace fam_inc = 11000 if totfam_inc>=10000 & totfam_inc<12000 
replace fam_inc = 13500 if totfam_inc>=12000 & totfam_inc<15000 
replace fam_inc = 1.25*15000 if totfam_inc>=15000 
tab fam_inc, m 
label var fam_inc "Family income, binned (based on midpoints of each bin)" 

/*Note: The suffix "_son" is used to match the variable names in other datasets. 
        All respondents (i.e., male) are given a value for these variables.*/	
gen bottomcoded_son = fam_inc==750 if fam_inc<. 
tab bottomcoded_son, m 

gen topcoded_son = fam_inc==1.25*15000 if fam_inc<. 
tab topcoded_son, m 

label var bottomcoded_son "Respondent family income, bottom coded" 
label var topcoded_son "Respondent family income, top coded" 

/* Turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */ 
gen CPI1950 = 24.1 
gen CPI1966 = 32.4 

gen fam_inc_real =. 
replace fam_inc_real = fam_inc * (CPI1950/CPI1966) 
label var fam_inc_real "Binned Family income, in 1950 dollars" 
	
gen lnfaminc=ln(fam_inc_real) 
label var lnfaminc "Logged family income, binned and real" 

/*------------------------------------------------------------------------------------------
	Birth cohorts						
------------------------------------------------------------------------------------------*/ 
replace year_birth=. if year_birth==-4 
drop if year_birth<7 | (year_birth>21 & year_birth<25) 

gen year=1966  
label var year "Survey year" 

*Note: "year_birth" values are last two values of calendar year 
gen dob = 1900 + year_birth 
tab dob, m 
label var dob "Year of birth" 

* Assign everyone the decade in which they were born
gen decade=. 
replace decade = 1900 if inrange(dob,1900,1909) 
replace decade = 1910 if inrange(dob,1910,1919) 
replace decade = 1920 if inrange(dob,1920,1929) 
tab decade, m 
label var decade "Decade of birth" 
	
* Generate dummies for each decade 
tab decade, gen(decade_) 

/*------------------------------------------------------------------------------------------
	Interactions			
-----------------------------------------------------------------------------------------*/ 

	global institution_list "black hs_ed coll_ed" 

* Demean the variables that we will use 
	foreach var in $institution_list { 
	sum `var' 
	gen `var'_dm = `var'- `r(mean)' 
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	} 
	
/*------------------------------------------------------------------------------------------
   Save			
------------------------------------------------------------------------------------------*/ 

gen faminc_missing = fam_inc_real==. 
label var faminc_missing "Family income missing" 

* Unique identifier already created
label var id_nlsom "R ID (unique identifier)" 

duplicates report id_nlsom //no duplicates reported  

*Restrict sample and save 
compress 
sort id_nlsom 
ren sampling_wght_nls weight_nlsom 
order id_nlsom weight_nlsom 
save ./output/nls_older_cleaned, replace 

